"""
Created on 2021-10-12 23:48:01
---------
@summary:测试爬取
---------
@author: 大龙
"""


from re import U
import feapder
Spider_Page = 20  # 爬取页数


class SpiderQsbk(feapder.AirSpider):
    def start_requests(self):
        for i in range(1, 20):
            yield feapder.Request("https://www.qiushibaike.com/8hr/page/{}/".format(i))

    def parse(self, request, response):
        '''
        解析标题及详情页链接
        '''
        # 选取recmd下所有元素
        article_list = response.xpath('//a[@class="recmd-content"]')
        for article in article_list:
            title = article.xpath("./text()").extract_first()
            # url不用再补充写xrljoint来补全url，feapder自动补全了
            url = article.xpath("./@href").extract_first()
            yield feapder.Request(url, callback=self.parse_detail, title=title)

    def parse_detail(self, request, response):
        '''
        解析文章详情
        '''
        url = response.url
        title = request.title
        content = response.xpath(
            'string(//div[@class="content"])').extract_first()
        print("url", url)
        print("title", title)
        print("content", content)


if __name__ == "__main__":
    SpiderQsbk().start()
